package mitlab.seg.crf_seg.util;

import java.util.Map;
import mitlab.seg.ner.domain.Term;
import mitlab.seg.ner.library.NatureLibrary;
import mitlab.seg.ner.library.NgramLibrary;

public class MathUtil {

  // 平滑参数
  private static final double dSmoothingPara = 0.1;
  // 分隔符我最喜欢的
  private static final String TAB = "\t";
  // 一个参数
  private static final int MAX_FREQUENCE = 2079997;// 7528283+329805;
  // ﻿Two linked Words frequency
  private static final double dTemp = (double) 1 / MAX_FREQUENCE;

  /**
   * 从一个词的词性到另一个词的词的分数
   * 
   * @param from 前面的词
   * @param to 后面的词
   * @return 分数
   */
  public static double compuScore(Term from, Term to, Map<String, Double> relationMap) {
    double frequency = from.termNatures().allFreq + 1;

    if (frequency < 0) {
      double score = from.score() + MAX_FREQUENCE;
      from.score(score);
      return score;
    }

    double nTwoWordsFreq = NgramLibrary.getTwoWordFreq(from, to);

    if (relationMap != null) {
      Double d = relationMap.get(from.getName() + TAB + to.getName());
      if (d != null) {
        nTwoWordsFreq += d;
      }
    }

    double value = -Math.log(dSmoothingPara * frequency / (MAX_FREQUENCE + 80000)
        + (1 - dSmoothingPara) * ((1 - dTemp) * nTwoWordsFreq / frequency + dTemp));

    if (value < 0) {
      value += frequency;
    }
    return from.score() + value;
  }

  /**
   * 词性词频词长.计算出来一个分数
   * 
   * @param from
   * @param term
   * @return
   */
  public static double compuScoreFreq(Term from, Term term) {
    return from.termNatures().allFreq + term.termNatures().allFreq;
  }



  public static double lnSum(float... fs) {
    double result = 0;
    for (int i = 0; i < fs.length; i++) {
      result += -Math.log(fs[i]);
    }
    return result;
  }

}
